library(phyloseq) # for phyloseq object
library(ggplot2)
library(ggsignif)
library(cowplot)
library(tidyverse)
library("plotly") # plot 3D
library("microbiome") # for centered log-ratio
library("coda") # Aitchison distance
library("coda.base") # Aitchison distance
library("vegan") # NMDS
library(pheatmap) # for heatmap
# Set path
path <- "~/Projects/IBS_Meta-analysis_16S"
path.plots <- file.path(path, "data/analysis-individual/PLOTS/plots-LoPresti-EDA")
# Import phyloseq object
physeq.lopresti <- readRDS(file.path(path, "phyloseq-objects/physeq_lopresti.rds"))
# Sanity check
physeq.lopresti
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 806 taxa and 57 samples ]
## sample_data() Sample Data: [ 57 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 806 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 806 tips and 801 internal nodes ]
## refseq() DNAStringSet: [ 806 reference sequences ]
Phylogenetic tree was computed with the package phangorn, and the script was run on a cluster. Let’s check we have correctly generated a phylogenetic tree.
# Look at the tree
plot_tree(physeq.lopresti, color = "Phylum", ladderize="left")
This dataset has several covariates (gender, age, bmi, sample_storage_duration). We will check whether there is the same distribution of these covariates between healthy and IBS patients.
# Number of individuals in each group (keeping just 1 sample per individual)
metadata <- data.frame(sample_data(physeq.lopresti)) %>%
select(host_disease, host_age, host_sex, host_ID) %>%
group_by(host_ID) %>%
summarise_all(first)
metadata %>%
count(host_disease)
# Age
metadata %>%
group_by(host_disease) %>%
summarize(mean_age=mean(host_age), sd_age=sd(host_age))
wilcox.test(metadata[metadata$host_disease == "IBS", ]$host_age,
metadata[metadata$host_disease == "Healthy", ]$host_age) # p=0.25
##
## Wilcoxon rank sum test with continuity correction
##
## data: metadata[metadata$host_disease == "IBS", ]$host_age and metadata[metadata$host_disease == "Healthy", ]$host_age
## W = 281, p-value = 0.2536
## alternative hypothesis: true location shift is not equal to 0
# Gender
metadata %>%
count(host_disease, host_sex)
chisq.test(data.frame("Female" = c(12,17),
"Male" = c(18,6))) # p=0.02 -> more females than males in IBS group
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data.frame(Female = c(12, 17), Male = c(18, 6))
## X-squared = 4.7518, df = 1, p-value = 0.02927
# Plot Phylum
plot_bar(physeq.lopresti, fill = "Phylum") + facet_wrap("sample_type", scales="free_x") +
theme(axis.text.x = element_text(size = 5))+
labs(x = "Samples", y = "Absolute abundance", title = "Lo Presti dataset (2019)")+
theme_cowplot()+
theme(axis.text.x = element_text(size=6, angle=90))
# ggsave(file.path(path.plots, "absAbundance_phylum.jpg"), width=13, height=5)
# Plot Class
plot_bar(physeq.lopresti, fill = "Class")+ facet_wrap("sample_type", scales="free_x") +
theme(axis.text.x = element_text(size = 5))+
labs(x = "Samples", y = "Absolute abundance", title = "Lo Presti dataset (2019)")+
theme_cowplot()+
theme(axis.text.x = element_text(size=6, angle=90))
Sequencing depth characteristics of the Hugerth dataset:
- minimum of 504 total count per sample
- median: 897 total count per sample
- maximum of 3838 total count per sample
# Agglomerate to phylum & class levels
phylum.table <- physeq.lopresti %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() # Melt to long format
class.table <- physeq.lopresti %>%
tax_glom(taxrank = "Class") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt()
# Plot relative abundances
ggplot(phylum.table, aes(x = reorder(Sample, Sample, function(x) mean(phylum.table[Sample == x & Phylum == 'Bacteroidota', 'Abundance'])),
y = Abundance, fill = Phylum))+
facet_wrap(~ sample_type + host_disease, scales = "free") + # scales = "free" removes empty lines
geom_bar(stat = "identity") +
theme(axis.text.x = element_blank())+
labs(x = "Samples", y = "Relative abundance", title = "Lo Presti dataset (2019)")
# ggsave(file.path(path.plots, "relAbundance_phylum.jpg"), width=10, height=5)
ggplot(class.table, aes(x = reorder(Sample, Sample, function(x) mean(class.table[Sample == x & Phylum == 'Bacteroidota', 'Abundance'])),
y = Abundance, fill = Class))+
facet_wrap(~ sample_type + host_disease, scales = "free") +
geom_bar(stat = "identity") +
theme(axis.text.x = element_blank(),
legend.key.size = unit(0.2, 'cm'),
legend.text = element_text(size=8))+
labs(x = "Samples", y = "Relative abundance", title = "Lo Presti dataset (2019)")
# ggsave(file.path(path.plots, "relAbundance_class.jpg"), width=12, height=5)
# Extract abundance of only Bacteroidota and Firmicutes
relevant.covariates <- c('Sample', 'Abundance', 'host_disease', 'host_subtype', 'Phylum', 'sample_type', 'host_ID', 'host_age', 'host_sex')
bacter <- phylum.table %>%
filter(Phylum == "Bacteroidota") %>%
select(all_of(relevant.covariates)) %>%
dplyr::rename(Bacteroidota = Abundance) %>%
select(-Phylum)
firmi <- phylum.table %>%
filter(Phylum == "Firmicutes") %>%
select(all_of(relevant.covariates)) %>%
dplyr::rename(Firmicutes = Abundance) %>%
select(-Phylum)
# Calculate log2 ratio Firmicutes/Bacteroidota
ratio.FB <- left_join(x=bacter, y=firmi, by=c('Sample', 'host_disease', 'host_subtype', 'sample_type', 'host_ID', 'host_age', 'host_sex')) %>%
relocate(Firmicutes, .after=Bacteroidota) %>%
# Compute log ratios
mutate(logRatioFB = log2(Firmicutes/Bacteroidota))
# Plot log2 ratio Firmicutes/Bacteroidota
ggplot(ratio.FB, aes(x = host_disease, y = logRatioFB))+
geom_violin(aes(fill=host_disease))+
scale_fill_manual(values=scales::alpha(c("blue", "red"), .3))+
geom_jitter(width=0.1)+
facet_wrap(~sample_type) +
geom_signif(comparisons = list(c("Healthy", "IBS")), map_signif_level = TRUE, test="wilcox.test") +
labs(x = "", y = 'Log2(Firmicutes/Bacteroidota)', title = "Firmicutes:Bacteroidota ratio")+
theme_cowplot()+
theme(legend.position="none")
# ggsave(file.path(path.plots, "ratioFB.jpg"), width=5, height=5)
# Statistical test sigmoid samples
wilcox.test(ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "IBS","logRatioFB"],
ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "Healthy","logRatioFB"]) # p = 0.5
##
## Wilcoxon rank sum test with continuity correction
##
## data: ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "IBS", "logRatioFB"] and ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "Healthy", "logRatioFB"]
## W = 18, p-value = 0.5074
## alternative hypothesis: true location shift is not equal to 0
# Statistical test stool samples
wilcox.test(ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "IBS","logRatioFB"],
ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "Healthy","logRatioFB"]) # p=0.01 significant difference in stool samples
##
## Wilcoxon rank sum test with continuity correction
##
## data: ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "IBS", "logRatioFB"] and ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "Healthy", "logRatioFB"]
## W = 143.5, p-value = 0.01208
## alternative hypothesis: true location shift is not equal to 0
# Plot log2 ratio Firmicutes/Bacteroidota per IBS subtype
ggplot(ratio.FB %>% filter(sample_type == "stool"), aes(x = host_subtype, y = logRatioFB))+
geom_violin(aes(fill=host_subtype))+
scale_fill_manual(values=scales::alpha(c("blue", "brown", "red", "orange"), .3))+
geom_jitter(width=0.1)+
geom_signif(comparisons = list(c("HC", "IBS-C"), c("HC", "IBS-D"), c("HC", "IBS-M")),
map_signif_level = TRUE, test="wilcox.test", step_increase=0.1) +
labs(x = "", y = 'Log2(Firmicutes/Bacteroidota)', title = "Fecal samples")+
theme_cowplot()+
theme(legend.position="none")
# ggsave(file.path(path.plots, "ratioFB_subtype.jpg"), width=5, height=5)
# Paired data
ggplot(ratio.FB, aes(x = sample_type, y = logRatioFB))+
geom_violin(aes(fill=host_disease))+
scale_fill_manual(values=scales::alpha(c("blue", "red"), .3))+
geom_point()+
geom_line(aes(group=host_ID), lwd=0.1) +
facet_wrap(~host_disease) +
labs(x = "", y = 'Log2(Firmicutes/Bacteroidota)', title = "Paired data") +
theme_cowplot()+
theme(legend.position="none")
# ggsave(file.path(path.plots, "ratioFB_paired-data.jpg"), width=5, height=5)
# Statistical test sigmoid vs stool samples
wilcox.test(ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "Healthy","logRatioFB"],
ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "Healthy","logRatioFB"]) # p=0.001
##
## Wilcoxon rank sum test with continuity correction
##
## data: ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "Healthy", "logRatioFB"] and ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "Healthy", "logRatioFB"]
## W = 172, p-value = 0.001039
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "IBS","logRatioFB"],
ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "IBS","logRatioFB"]) # p=0.04
##
## Wilcoxon rank sum exact test
##
## data: ratio.FB[ratio.FB$sample_type == "stool" & ratio.FB$host_disease == "IBS", "logRatioFB"] and ratio.FB[ratio.FB$sample_type == "sigmoid" & ratio.FB$host_disease == "IBS", "logRatioFB"]
## W = 63, p-value = 0.04382
## alternative hypothesis: true location shift is not equal to 0
# Sanity check no sample with less than 500 total count
table(sample_sums(physeq.lopresti)<500) # all FALSE
#____________________________________________________________________
# PHYLOSEQ OBJECT WITH NON-ZERO COMPOSITIONS
physeq.NZcomp <- physeq.lopresti
otu_table(physeq.NZcomp)[otu_table(physeq.NZcomp) == 0] <- 0.5 # pseudocounts
# Sanity check that 0 values have been replaced
# otu_table(physeq.lopresti)[1:5,1:5]
# otu_table(physeq.NZcomp)[1:5,1:5]
# transform into compositions
physeq.NZcomp <- transform_sample_counts(physeq.NZcomp, function(x) x / sum(x) )
table(rowSums(otu_table(physeq.NZcomp))) # check if there is any row not summing to 1
# Save object
saveRDS(physeq.NZcomp, file.path(path, "data/analysis-individual/LoPresti-2019/02_EDA-LoPresti/physeq_NZcomp.rds"))
#____________________________________________________________________
# PHYLOSEQ OBJECT WITH RELATIVE COUNT (BETWEEN 0 AND 1)
physeq.rel <- physeq.lopresti
physeq.rel <- transform_sample_counts(physeq.rel, function(x) x / sum(x) )
# check the counts are all relative
# otu_table(physeq.lopresti)[1:5, 1:5]
# otu_table(physeq.rel)[1:5, 1:5]
# sanity check
table(rowSums(otu_table(physeq.rel))) # check if there is any row not summing to 1
# save the physeq.rel object
saveRDS(physeq.rel, file.path(path, "data/analysis-individual/LoPresti-2019/02_EDA-LoPresti/physeq_relative.rds"))
#____________________________________________________________________
# PHYLOSEQ OBJECT WITH COMMON-SCALE NORMALIZATION
physeq.CSN <- physeq.lopresti
physeq.CSN <- transform_sample_counts(physeq.CSN, function(x) (x*min(sample_sums(physeq.CSN))) / sum(x) )
# sanity check
table(rowSums(otu_table(physeq.CSN))) # check that all rows are summing to the same total
# save the physeq.CSN object
saveRDS(physeq.CSN, file.path(path, "data/analysis-individual/LoPresti-2019/02_EDA-LoPresti/physeq_CSN.rds"))
#____________________________________________________________________
# PHYLOSEQ OBJECT WITH CENTERED LOG RATIO COUNT
physeq.clr <- physeq.lopresti
physeq.clr <- microbiome::transform(physeq.lopresti, "clr") # the function adds pseudocounts itself
# Compare the otu tables in the original phyloseq object and the new one after CLR transformation
# otu_table(physeq.lopresti)[1:5, 1:5] # should contain absolute counts
# otu_table(physeq.clr)[1:5, 1:5] # should all be relative
# save the physeq.rel object
saveRDS(physeq.clr, file.path(path, "data/analysis-individual/LoPresti-2019/02_EDA-LoPresti/physeq_clr.rds"))
First, let’s look at these four distances of interest.
#____________________________________________________________________________________
# Measure distances
getDistances <- function(relPhyseq=physeq.rel, clrPhyseq=physeq.clr, csnPhyseq=physeq.CSN, nzcompPhyseq=physeq.NZcomp){
# sanity check
cat("nb samples relPhyseq:", nsamples(relPhyseq), "\n")
cat("nb samples clrPhyseq:", nsamples(clrPhyseq), "\n")
cat("nb samples csnPhyseq:", nsamples(csnPhyseq), "\n")
cat("nb samples nzcompPhyseq:", nsamples(nzcompPhyseq), "\n")
# Compute distances
print("Unifrac...")
set.seed(123) # for unifrac, need to set a seed
glom.UniF <- UniFrac(relPhyseq, weighted=TRUE, normalized=TRUE) # weighted unifrac
print("Aitchison...")
glom.ait <- phyloseq::distance(clrPhyseq, method = 'euclidean') # aitchison
print("Bray des bois...")
glom.bray <- phyloseq::distance(csnPhyseq, method = "bray") # bray-curtis
print("Canberra <3...")
glom.can <- phyloseq::distance(nzcompPhyseq, method = "canberra") # canberra
dist.list <- list("UniF" = glom.UniF, "Ait" = glom.ait, "Canb" = glom.can, "Bray" = glom.bray)
return(dist.list)
}
#____________________________________________________________________________________
# Plot in 2D the distances
plotDistances2D <- function(dlist, ordination="MDS", relPhyseq=physeq.rel, clrPhyseq=physeq.clr, csnPhyseq=physeq.CSN, nzcompPhyseq=physeq.NZcomp){
plist <- NULL
plist <- vector("list", 4)
names(plist) <- c("Weighted Unifrac", "Aitchison", "Bray-Curtis", "Canberra")
print("Unifrac")
# Weighted UniFrac
set.seed(123)
iMDS.UniF <- ordinate(relPhyseq, ordination, distance=dlist$UniF)
plist[[1]] <- plot_ordination(relPhyseq, iMDS.UniF, color="host_disease")
print("Aitchison")
# Aitchison
set.seed(123)
iMDS.Ait <- ordinate(clrPhyseq, ordination, distance=dlist$Ait)
plist[[2]] <- plot_ordination(clrPhyseq, iMDS.Ait, color="host_disease")
print("Bray")
# Bray-Curtis
set.seed(123)
iMDS.Bray <- ordinate(csnPhyseq, ordination, distance=dlist$Bray)
plist[[3]] <- plot_ordination(csnPhyseq, iMDS.Bray, color="host_disease")
print("Canberra")
# Canberra
set.seed(123)
iMDS.Can <- ordinate(nzcompPhyseq, ordination, distance=dlist$Can)
plist[[4]] <- plot_ordination(nzcompPhyseq, iMDS.Can, color="host_disease")
# Creating a dataframe to plot everything
plot.df = plyr::ldply(plist, function(x) x$data)
names(plot.df)[1] <- "distance"
return(plot.df)
}
Now let’s plot!
#________________
# FECAL DATA
#________________
# Get the distances & the plot data
dist.lopresti.fecal <- getDistances(relPhyseq = subset_samples(physeq.rel, sample_type=="stool"),
clrPhyseq = subset_samples(physeq.clr, sample_type=="stool"),
csnPhyseq = subset_samples(physeq.CSN, sample_type=="stool"),
nzcompPhyseq = subset_samples(physeq.NZcomp, sample_type=="stool"))
## nb samples relPhyseq: 46
## nb samples clrPhyseq: 46
## nb samples csnPhyseq: 46
## nb samples nzcompPhyseq: 46
## [1] "Unifrac..."
## Warning in UniFrac(relPhyseq, weighted = TRUE, normalized = TRUE): Randomly
## assigning root as -- ASV415 -- in the phylogenetic tree in the data you
## provided.
## Warning in matrix(tree$edge[order(tree$edge[, 1]), ][, 2], byrow = TRUE, : data
## length [1609] is not a sub-multiple or multiple of the number of rows [805]
## [1] "Aitchison..."
## [1] "Bray des bois..."
## [1] "Canberra <3..."
# Error message: "data length [1609] is not a sub-multiple or multiple of the number of rows [805]"
# Let's check if some nodes have more than 2 children, which could explain why there is an odd number of rows in the edge matrix
edges <- phy_tree(physeq.rel)$edge
mycounts <- table(edges[,1])
length(mycounts[mycounts==2])
## [1] 801
length(mycounts[mycounts!=2])
## [1] 2
mycounts[mycounts!=2] # There are 2 nodes with 3 children
##
## 807 1600
## 3 3
# Let's get a dichotomy tree to avoid this error
phy_tree(physeq.rel) <- ape::multi2di(phy_tree(physeq.rel))
# Re-run the distances
dist.lopresti.fecal <- getDistances(relPhyseq = subset_samples(physeq.rel, sample_type=="stool"),
clrPhyseq = subset_samples(physeq.clr, sample_type=="stool"),
csnPhyseq = subset_samples(physeq.CSN, sample_type=="stool"),
nzcompPhyseq = subset_samples(physeq.NZcomp, sample_type=="stool"))
## nb samples relPhyseq: 46
## nb samples clrPhyseq: 46
## nb samples csnPhyseq: 46
## nb samples nzcompPhyseq: 46
## [1] "Unifrac..."
## [1] "Aitchison..."
## [1] "Bray des bois..."
## [1] "Canberra <3..."
plot.df.fecal <- plotDistances2D(dlist=dist.lopresti.fecal,
relPhyseq = subset_samples(physeq.rel, sample_type=="stool"),
clrPhyseq = subset_samples(physeq.clr, sample_type=="stool"),
csnPhyseq = subset_samples(physeq.CSN, sample_type=="stool"),
nzcompPhyseq = subset_samples(physeq.NZcomp, sample_type=="stool"))
## [1] "Unifrac"
## [1] "Aitchison"
## [1] "Bray"
## [1] "Canberra"
# Plot
ggplot(plot.df.fecal, aes(Axis.1, Axis.2, color=host_disease))+
geom_point(size=4, alpha=0.5) + scale_color_manual(values = c('blue', 'red'))+
facet_wrap(distance~., scales='free', nrow=1)+
theme_bw()+
theme(strip.text.x = element_text(size=20))+
labs(color="Disease", title="Fecal samples")
# ggsave(file.path(path.plots, "distances4_MDS_stool.jpg"), height = 4, width = 15)
#________________
# SIGMOID DATA
#________________
# Get the distances & the plot data
dist.lopresti.sigmoid <- getDistances(relPhyseq = subset_samples(physeq.rel, sample_type=="sigmoid"),
clrPhyseq = subset_samples(physeq.clr, sample_type=="sigmoid"),
csnPhyseq = subset_samples(physeq.CSN, sample_type=="sigmoid"),
nzcompPhyseq = subset_samples(physeq.NZcomp, sample_type=="sigmoid"))
## nb samples relPhyseq: 11
## nb samples clrPhyseq: 11
## nb samples csnPhyseq: 11
## nb samples nzcompPhyseq: 11
## [1] "Unifrac..."
## [1] "Aitchison..."
## [1] "Bray des bois..."
## [1] "Canberra <3..."
plot.df.sigmoid <- plotDistances2D(dlist=dist.lopresti.sigmoid,
relPhyseq = subset_samples(physeq.rel, sample_type=="sigmoid"),
clrPhyseq = subset_samples(physeq.clr, sample_type=="sigmoid"),
csnPhyseq = subset_samples(physeq.CSN, sample_type=="sigmoid"),
nzcompPhyseq = subset_samples(physeq.NZcomp, sample_type=="sigmoid"))
## [1] "Unifrac"
## [1] "Aitchison"
## [1] "Bray"
## [1] "Canberra"
# Plot
ggplot(plot.df.sigmoid, aes(Axis.1, Axis.2, color=host_disease))+
geom_point(size=4, alpha=0.5) + scale_color_manual(values = c('blue', 'red'))+
facet_wrap(distance~., scales='free', nrow=1)+
theme_bw()+
theme(strip.text.x = element_text(size=20))+
labs(color="Disease", title="Sigmoid samples")
# ggsave(file.path(path.plots, "distances4_MDS_sigmoid.jpg"), height = 4, width = 15)
#________________
# ALL DATA
#________________
# Get the distances & the plot data
dist.lopresti <- getDistances()
## nb samples relPhyseq: 57
## nb samples clrPhyseq: 57
## nb samples csnPhyseq: 57
## nb samples nzcompPhyseq: 57
## [1] "Unifrac..."
## [1] "Aitchison..."
## [1] "Bray des bois..."
## [1] "Canberra <3..."
plot.df <- plotDistances2D(dlist=dist.lopresti)
## [1] "Unifrac"
## [1] "Aitchison"
## [1] "Bray"
## [1] "Canberra"
# Plot
p1 <- ggplot(plot.df, aes(Axis.1, Axis.2, color=host_disease))+
geom_point(size=4, alpha=0.5) + scale_color_manual(values = c('blue', 'red'))+
facet_wrap(distance~., scales='free', nrow=1)+
theme_bw()+
theme(strip.text.x = element_text(size=20),
axis.title.x = element_blank())+
labs(color="Disease")
p2 <- ggplot(plot.df, aes(Axis.1, Axis.2, color=sample_type))+
geom_line(aes(group=host_ID), color="black", lwd=0.1)+
geom_point(size=4, alpha=0.5) + scale_color_manual(values = c('#33CC00', 'purple'))+
facet_wrap(distance~., scales='free', nrow=1)+
theme_bw()+
theme(strip.text.x = element_blank())+
labs(color="Sample type")
ggpubr::ggarrange(p1,p2, nrow=2)
# ggsave(file.path(path.plots, "distances4_MDS_all.jpg"), height = 8, width = 15)
For better visualization, we will also take a glance at reduction to 3D.
#____________________________________________________________________________________
# Plot 3D ordination
plotDistances3D <- function(d, name_dist, physeq=physeq.lopresti){
# Reset parameters
mds.3D <- NULL
xyz <- NULL
fig.3D <- NULL
# Reduce distance matrix to 3 dimensions
set.seed(123)
mds.3D <- metaMDS(d, method="MDS", k=3, trace = 0)
xyz <- scores(mds.3D, display="sites") # pull out the (x,y,z) coordinates
# Plot
fig.3D <- plot_ly(x=xyz[,1], y=xyz[,2], z=xyz[,3], type="scatter3d", mode="markers",
color=sample_data(physeq)$host_disease, colors = c("blue", "red"))%>%
layout(title = paste('MDS in 3D with', name_dist, 'distance', sep = ' '))
return(fig.3D)
}
Now let’s plot!
# Fecal samples
plotDistances3D(dist.lopresti.fecal$UniF, "UniFrac", subset_samples(physeq.lopresti, sample_type=="stool"))
plotDistances3D(dist.lopresti.fecal$Ait, "Aitchison", subset_samples(physeq.lopresti, sample_type=="stool"))
plotDistances3D(dist.lopresti.fecal$Canb, "Canberra", subset_samples(physeq.lopresti, sample_type=="stool"))
plotDistances3D(dist.lopresti.fecal$Bray, "Bray-Curtis", subset_samples(physeq.lopresti, sample_type=="stool"))
# Note: for Bray-Curtis, all the samples are accumulated next to each other at coordinates (0,0,0)
# For heatmaps: have group color
matcol <- data.frame(phenotype = sample_data(physeq.lopresti)[,"host_disease"],
host_subtype = sample_data(physeq.lopresti)[,"host_subtype"],
sample = sample_data(physeq.lopresti)[,"sample_type"])
# Function to get heatmap from the distances computed
plotHeatmaps <- function(dlist, fontsize){
# Initialize variables
i=1
plist <- vector("list", 4)
names(plist) <- names(dlist)
# Loop through distances
for(d in dlist){
plist[[i]] <- pheatmap(as.matrix(d),
clustering_distance_rows = d,
clustering_distance_cols = d,
fontsize = fontsize,
show_rownames = F,
show_colnames = F,
annotation_col = matcol,
# annotation_row = matcol,
annotation_colors = list(host_disease = c('Healthy' = 'blue', 'IBS' = 'red'),
sample_type = c("stool" = "purple", 'sigmoid' = '#33CC00'),
host_subtype = c('HC'='black', 'IBS-M'='orange', 'IBS-C'='brown', 'IBS-D'='red')),
cluster_rows = T,
cluster_cols = T,
clustering_method = 'complete', # hc method
main = names(dlist)[i]) # have name of distance as title
i <- i+1
}
return(plist)
}
# Get the heatmaps
heatmp.lopresti <- plotHeatmaps(dlist = dist.lopresti, fontsize = 8)
# Figure 1B and 3B
phylum.table$host_disease <- factor(phylum.table$host_disease, levels = c('IBS', 'Healthy')) # have same order as paper
phylum.table$sample_type <- factor(phylum.table$sample_type, levels = c('stool', 'sigmoid'))
ggplot(phylum.table, aes(x = host_disease, y = Abundance, fill = Phylum))+
geom_bar(stat = "identity", position = "fill") +
facet_wrap(~sample_type)+
theme_cowplot()+
theme(axis.text.x = element_text(size = 10),
axis.title.x = element_blank())+
labs(x = "Samples", y = "Relative abundance", title = "Stool samples")
For the fecal samples, we get a very similar plot as in the original paper (decrease Firmicutes in IBS, increase Bacteroidota, increase Verrucomicrobiota). For the sigmoid samples, comparison with the original paper is limited, as we discarded most of the samples (only 11 sigmoid mucosa samples left).
# Get relative abundance of families
genus.table <- physeq.lopresti %>%
tax_glom(taxrank = "Genus") %>%
transform_sample_counts(function(x) {x/sum(x)} ) %>%
psmelt()
# Check at genera present
table(genus.table$Genus)
##
## Acidaminococcus Agathobacter
## 57 57
## Akkermansia Alistipes
## 57 57
## Alloprevotella Anaerostipes
## 57 57
## Anaerotruncus Bacteroides
## 57 57
## Barnesiella Blautia
## 57 57
## Butyricicoccus Butyricimonas
## 57 57
## CAG-873 Catenibacterium
## 57 57
## Cellulosilyticum Christensenellaceae_R-7_group
## 57 57
## Citrobacter Clostridium_sensu_stricto_1
## 57 57
## Coprobacter Coprococcus
## 57 57
## Dialister Dielma
## 57 57
## Dorea DTU089
## 57 57
## Eisenbergiella Erysipelatoclostridium
## 57 57
## Erysipelotrichaceae_UCG-003 Escherichia/Shigella
## 57 57
## Eubacterium Faecalibacterium
## 57 57
## Faecalitalea Family_XIII_AD3011_group
## 57 57
## Flavobacterium Flavonifractor
## 57 57
## Fusicatenibacter Fusobacterium
## 57 57
## Georgenia Granulicatella
## 57 57
## Haemophilus Hafnia-Obesumbacterium
## 57 57
## Holdemanella Holdemania
## 57 57
## Incertae_Sedis Lachnoclostridium
## 57 57
## Lachnospira Lachnospiraceae_NK4A136_group
## 57 57
## Lachnospiraceae_UCG-001 Lactobacillus
## 57 57
## Monoglobus Negativibacillus
## 57 57
## NK4A214_group Odoribacter
## 57 57
## Oscillibacter Oscillospira
## 57 57
## Parabacteroides Parasutterella
## 57 57
## Peptoclostridium Phascolarctobacterium
## 57 57
## Prevotella Prevotellaceae_NK3B31_group
## 57 57
## Prevotellaceae_UCG-003 Proteus
## 57 57
## Pseudoflavonifractor Pseudomonas
## 57 57
## Pyramidobacter Rikenellaceae_RC9_gut_group
## 57 57
## Robinsoniella Romboutsia
## 57 57
## Roseburia Ruminococcus
## 57 57
## Shuttleworthia Slackia
## 57 57
## Streptococcus Subdoligranulum
## 57 57
## Succiniclasticum Succinivibrio
## 57 57
## Sutterella Turicibacter
## 57 57
## Tuzzerella UBA1819
## 57 57
## UCG-002 UCG-003
## 57 57
## UCG-005
## 57
ggplot(genus.table %>% filter(Genus %in% c("Bacteroides", "Parabacteroides", "Pseudomonas", "Prevotella", "Anaerostipes", "Eubacterium", "Haemophilus")),
aes(x=host_disease, y=Abundance, fill=Genus))+
geom_boxplot(outlier.shape = NA, position=position_dodge(0.75))+
geom_point(position=position_dodge(0.75), aes(color=Genus))+
theme_cowplot()+
labs(x="", y="Relative abundance")